{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.preprocessing import OneHotEncoder,Binarizer\n",
    "from sklearn.model_selection import train_test_split\n",
    "import numpy as np\n",
    "from sklearn.linear_model import LogisticRegression,LinearRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.metrics import accuracy_score,confusion_matrix,mean_squared_error,recall_score,roc_auc_score,precision_score,f1_score\n",
    "from sklearn.model_selection import GridSearchCV,learning_curve\n",
    "import joblib\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import RocCurveDisplay\n",
    "from sklearn import metrics\n",
    "from sklearn.metrics import precision_recall_curve\n",
    "from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer\n",
    "import jieba\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.impute import SimpleImputer\n",
    "from sklearn.utils import shuffle"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "       tag_104013  tag_104014  tag_104016  tag_104017  tag_104018  tag_104019  \\\n0               2           0           0           2           0           0   \n1               2           0           0           0           2           0   \n2               2           0           0           0           0           2   \n3               2           0           0           0           2           0   \n4               1           2           0           2           0           0   \n...           ...         ...         ...         ...         ...         ...   \n20378           1           0           0           0           1           0   \n20379           2           0           0           2           0           0   \n20380           0           2           1           2           1           0   \n20381           2           0           0           0           2           0   \n20382           2           0           0           0           2           0   \n\n       tag_104020  tag_104563  tag_112950  tag_121711  ...  r6  r7  r8  r9  \\\n0               0           0           1           0  ...   0   0   0   0   \n1               0           0           0           0  ...   0   1   0   0   \n2               0           0           0           0  ...   0   0   0   0   \n3               0           0           0           0  ...   0   0   0   0   \n4               0           0           0           0  ...   0   0   0   0   \n...           ...         ...         ...         ...  ...  ..  ..  ..  ..   \n20378           0           0           0           0  ...   1   0   0   0   \n20379           0           1           0           0  ...   0   0   0   0   \n20380           0           0           0           0  ...   1   0   0   0   \n20381           0           0           1           0  ...   0   0   0   0   \n20382           0           0           0           0  ...   0   0   0   0   \n\n       g1  g2  g3  g4  g5  g6  \n0       0   0   1   0   0   0  \n1       0   1   0   0   0   0  \n2       0   0   0   1   0   0  \n3       0   1   0   0   0   0  \n4       0   0   1   0   0   0  \n...    ..  ..  ..  ..  ..  ..  \n20378   0   0   0   0   0   1  \n20379   0   0   1   0   0   0  \n20380   0   0   1   0   0   0  \n20381   0   1   0   0   0   0  \n20382   0   1   0   0   0   0  \n\n[20383 rows x 326 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>tag_104013</th>\n      <th>tag_104014</th>\n      <th>tag_104016</th>\n      <th>tag_104017</th>\n      <th>tag_104018</th>\n      <th>tag_104019</th>\n      <th>tag_104020</th>\n      <th>tag_104563</th>\n      <th>tag_112950</th>\n      <th>tag_121711</th>\n      <th>...</th>\n      <th>r6</th>\n      <th>r7</th>\n      <th>r8</th>\n      <th>r9</th>\n      <th>g1</th>\n      <th>g2</th>\n      <th>g3</th>\n      <th>g4</th>\n      <th>g5</th>\n      <th>g6</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>20378</th>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>20379</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>20380</th>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>20381</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>20382</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>20383 rows × 326 columns</p>\n</div>"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"C:\\\\Users\\\\Administrator\\\\Desktop\\\\月考练习算法题 (2)\\\\月考练习算法题\\\\第4套（修改3）\\\\专高6月考-04附件\\\\sample.csv\")\n",
    "df\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "outputs": [
    {
     "data": {
      "text/plain": "   tag_104013  tag_104014  tag_104016  tag_104017  tag_104018  tag_104019  \\\n0           2           0           0           2           0           0   \n1           2           0           0           0           2           0   \n2           2           0           0           0           0           2   \n3           2           0           0           0           2           0   \n4           1           2           0           2           0           0   \n\n   tag_104020  tag_104563  tag_112950  tag_121711  ...  r6  r7  r8  r9  g1  \\\n0           0           0           1           0  ...   0   0   0   0   0   \n1           0           0           0           0  ...   0   1   0   0   0   \n2           0           0           0           0  ...   0   0   0   0   0   \n3           0           0           0           0  ...   0   0   0   0   0   \n4           0           0           0           0  ...   0   0   0   0   0   \n\n   g2  g3  g4  g5  g6  \n0   0   1   0   0   0  \n1   1   0   0   0   0  \n2   0   0   1   0   0  \n3   1   0   0   0   0  \n4   0   1   0   0   0  \n\n[5 rows x 326 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>tag_104013</th>\n      <th>tag_104014</th>\n      <th>tag_104016</th>\n      <th>tag_104017</th>\n      <th>tag_104018</th>\n      <th>tag_104019</th>\n      <th>tag_104020</th>\n      <th>tag_104563</th>\n      <th>tag_112950</th>\n      <th>tag_121711</th>\n      <th>...</th>\n      <th>r6</th>\n      <th>r7</th>\n      <th>r8</th>\n      <th>r9</th>\n      <th>g1</th>\n      <th>g2</th>\n      <th>g3</th>\n      <th>g4</th>\n      <th>g5</th>\n      <th>g6</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 326 columns</p>\n</div>"
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "data": {
      "text/plain": "<bound method DataFrame.info of        tag_104013  tag_104014  tag_104016  tag_104017  tag_104018  tag_104019  \\\n0               2           0           0           2           0           0   \n1               2           0           0           0           2           0   \n2               2           0           0           0           0           2   \n3               2           0           0           0           2           0   \n4               1           2           0           2           0           0   \n...           ...         ...         ...         ...         ...         ...   \n20378           1           0           0           0           1           0   \n20379           2           0           0           2           0           0   \n20380           0           2           1           2           1           0   \n20381           2           0           0           0           2           0   \n20382           2           0           0           0           2           0   \n\n       tag_104020  tag_104563  tag_112950  tag_121711  ...  r6  r7  r8  r9  \\\n0               0           0           1           0  ...   0   0   0   0   \n1               0           0           0           0  ...   0   1   0   0   \n2               0           0           0           0  ...   0   0   0   0   \n3               0           0           0           0  ...   0   0   0   0   \n4               0           0           0           0  ...   0   0   0   0   \n...           ...         ...         ...         ...  ...  ..  ..  ..  ..   \n20378           0           0           0           0  ...   1   0   0   0   \n20379           0           1           0           0  ...   0   0   0   0   \n20380           0           0           0           0  ...   1   0   0   0   \n20381           0           0           1           0  ...   0   0   0   0   \n20382           0           0           0           0  ...   0   0   0   0   \n\n       g1  g2  g3  g4  g5  g6  \n0       0   0   1   0   0   0  \n1       0   1   0   0   0   0  \n2       0   0   0   1   0   0  \n3       0   1   0   0   0   0  \n4       0   0   1   0   0   0  \n...    ..  ..  ..  ..  ..  ..  \n20378   0   0   0   0   0   1  \n20379   0   0   1   0   0   0  \n20380   0   0   1   0   0   0  \n20381   0   1   0   0   0   0  \n20382   0   1   0   0   0   0  \n\n[20383 rows x 326 columns]>"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.info\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "outputs": [],
   "source": [
    "# df = shuffle(df)\n",
    "# df\n",
    "\n",
    "# df = df.sample(frac = 1)\n",
    "# df"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "data": {
      "text/plain": "       tag_104013  tag_104014  tag_104016  tag_104017  tag_104018  tag_104019  \\\n0               2           0           0           2           0           0   \n1               2           0           0           0           2           0   \n2               2           0           0           0           0           2   \n3               2           0           0           0           2           0   \n4               1           2           0           2           0           0   \n...           ...         ...         ...         ...         ...         ...   \n20378           1           0           0           0           1           0   \n20379           2           0           0           2           0           0   \n20380           0           2           1           2           1           0   \n20381           2           0           0           0           2           0   \n20382           2           0           0           0           2           0   \n\n       tag_104020  tag_104563  tag_112950  tag_121711  ...  r6  r7  r8  r9  \\\n0               0           0           1           0  ...   0   0   0   0   \n1               0           0           0           0  ...   0   1   0   0   \n2               0           0           0           0  ...   0   0   0   0   \n3               0           0           0           0  ...   0   0   0   0   \n4               0           0           0           0  ...   0   0   0   0   \n...           ...         ...         ...         ...  ...  ..  ..  ..  ..   \n20378           0           0           0           0  ...   1   0   0   0   \n20379           0           1           0           0  ...   0   0   0   0   \n20380           0           0           0           0  ...   1   0   0   0   \n20381           0           0           1           0  ...   0   0   0   0   \n20382           0           0           0           0  ...   0   0   0   0   \n\n       g1  g2  g3  g4  g5  g6  \n0       0   0   1   0   0   0  \n1       0   1   0   0   0   0  \n2       0   0   0   1   0   0  \n3       0   1   0   0   0   0  \n4       0   0   1   0   0   0  \n...    ..  ..  ..  ..  ..  ..  \n20378   0   0   0   0   0   1  \n20379   0   0   1   0   0   0  \n20380   0   0   1   0   0   0  \n20381   0   1   0   0   0   0  \n20382   0   1   0   0   0   0  \n\n[20383 rows x 325 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>tag_104013</th>\n      <th>tag_104014</th>\n      <th>tag_104016</th>\n      <th>tag_104017</th>\n      <th>tag_104018</th>\n      <th>tag_104019</th>\n      <th>tag_104020</th>\n      <th>tag_104563</th>\n      <th>tag_112950</th>\n      <th>tag_121711</th>\n      <th>...</th>\n      <th>r6</th>\n      <th>r7</th>\n      <th>r8</th>\n      <th>r9</th>\n      <th>g1</th>\n      <th>g2</th>\n      <th>g3</th>\n      <th>g4</th>\n      <th>g5</th>\n      <th>g6</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>20378</th>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>20379</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>20380</th>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>20381</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>20382</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>20383 rows × 325 columns</p>\n</div>"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df.drop('is_act',axis=1)\n",
    "y = df[\"is_act\"]\n",
    "X\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "data": {
      "text/plain": "       tag_104013  tag_104014  tag_104016  tag_104017  tag_104018  tag_104019  \\\n10824           0           2           0           2           0           0   \n1047            1           2           1           2           0           0   \n12985           2           0           0           1           2           0   \n7443            2           0           0           2           1           0   \n17823           0           2           0           0           2           0   \n...           ...         ...         ...         ...         ...         ...   \n11284           0           2           0           0           2           0   \n11964           0           2           0           0           2           0   \n5390            2           0           0           1           2           0   \n860             2           0           0           0           2           0   \n15795           2           0           0           2           0           0   \n\n       tag_104020  tag_104563  tag_112950  tag_121711  ...  r6  r7  r8  r9  \\\n10824           0           0           0           1  ...   0   0   1   0   \n1047            0           0           0           0  ...   0   0   0   0   \n12985           0           0           0           0  ...   0   0   0   0   \n7443            0           0           0           0  ...   0   0   0   0   \n17823           0           1           0           0  ...   0   0   0   0   \n...           ...         ...         ...         ...  ...  ..  ..  ..  ..   \n11284           0           0           0           1  ...   0   0   1   0   \n11964           0           0           0           0  ...   0   0   0   0   \n5390            0           0           0           0  ...   0   0   0   0   \n860             0           0           0           0  ...   0   0   0   0   \n15795           0           0           0           0  ...   0   0   0   0   \n\n       g1  g2  g3  g4  g5  g6  \n10824   0   0   1   0   0   0  \n1047    0   0   1   0   0   0  \n12985   0   1   0   0   0   0  \n7443    0   0   1   0   0   0  \n17823   0   1   0   0   0   0  \n...    ..  ..  ..  ..  ..  ..  \n11284   0   1   0   0   0   0  \n11964   0   1   0   0   0   0  \n5390    0   1   0   0   0   0  \n860     0   1   0   0   0   0  \n15795   0   0   1   0   0   0  \n\n[13656 rows x 325 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>tag_104013</th>\n      <th>tag_104014</th>\n      <th>tag_104016</th>\n      <th>tag_104017</th>\n      <th>tag_104018</th>\n      <th>tag_104019</th>\n      <th>tag_104020</th>\n      <th>tag_104563</th>\n      <th>tag_112950</th>\n      <th>tag_121711</th>\n      <th>...</th>\n      <th>r6</th>\n      <th>r7</th>\n      <th>r8</th>\n      <th>r9</th>\n      <th>g1</th>\n      <th>g2</th>\n      <th>g3</th>\n      <th>g4</th>\n      <th>g5</th>\n      <th>g6</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>10824</th>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1047</th>\n      <td>1</td>\n      <td>2</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>12985</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>7443</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>17823</th>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>11284</th>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>11964</th>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>5390</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>860</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>15795</th>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>13656 rows × 325 columns</p>\n</div>"
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test =train_test_split(X,y,test_size=0.33,random_state=42,shuffle=True)\n",
    "X_train"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [
    {
     "data": {
      "text/plain": "RandomForestClassifier(max_depth=50)",
      "text/html": "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(max_depth=50)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(max_depth=50)</pre></div></div></div></div></div>"
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#随机森林建模\n",
    "model_rf = RandomForestClassifier(max_depth=50, min_samples_split=2, max_leaf_nodes=None)\n",
    "model_rf"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "#逻辑回归建模\n",
    "model_lr = LogisticRegression(C=1.0, max_iter=100, n_jobs=1)\n",
    "model_lr"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n",
     "is_executing": true
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "#randomTree\n",
    "rf_params = {\n",
    "    'n_estimators': [100,150,200],\n",
    "    'max_depth': [50, 60, 70],\n",
    "    'max_samples': [0.6, 0.9, 1]\n",
    "}\n",
    "rf_gridCV = GridSearchCV(model_rf, param_grid=rf_params, cv=5, scoring=None)\n",
    "\n",
    "rf_gridCV.fit(X_train, y_train)\n",
    "\n",
    "print(rf_gridCV.best_params_)\n",
    "#{'max_depth': 60, 'max_samples': 0.9, 'n_estimators': 300}\n",
    "\n",
    "best_rf_model = rf_gridCV.best_estimator_\n",
    "best_rf_pred = best_rf_model.predict(X_test)\n",
    "best_acc_rf = accuracy_score(y_test, best_rf_pred)\n",
    "best_acc_rf\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n",
     "is_executing": true
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'X_train' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "Cell \u001B[1;32mIn[11], line 9\u001B[0m\n\u001B[0;32m      2\u001B[0m lr_params \u001B[38;5;241m=\u001B[39m {\n\u001B[0;32m      3\u001B[0m     \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mfit_intercept\u001B[39m\u001B[38;5;124m'\u001B[39m: [\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mTrue\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mFalse\u001B[39m\u001B[38;5;124m'\u001B[39m],\n\u001B[0;32m      4\u001B[0m     \u001B[38;5;124m'\u001B[39m\u001B[38;5;124msolver\u001B[39m\u001B[38;5;124m'\u001B[39m: [\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mliblinear\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124msag\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mlbfgs\u001B[39m\u001B[38;5;124m'\u001B[39m],\n\u001B[0;32m      5\u001B[0m     \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmax_iter\u001B[39m\u001B[38;5;124m'\u001B[39m: [\u001B[38;5;241m10\u001B[39m, \u001B[38;5;241m30\u001B[39m, \u001B[38;5;241m40\u001B[39m],\n\u001B[0;32m      6\u001B[0m }\n\u001B[0;32m      7\u001B[0m lr_gridCV \u001B[38;5;241m=\u001B[39m GridSearchCV(model_lr, param_grid\u001B[38;5;241m=\u001B[39mlr_params, cv\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m5\u001B[39m, scoring\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m)\n\u001B[1;32m----> 9\u001B[0m lr_gridCV\u001B[38;5;241m.\u001B[39mfit(\u001B[43mX_train\u001B[49m, y_train)\n\u001B[0;32m     11\u001B[0m \u001B[38;5;28mprint\u001B[39m(lr_gridCV\u001B[38;5;241m.\u001B[39mbest_params_)\n\u001B[0;32m     12\u001B[0m \u001B[38;5;66;03m#{'fit_intercept': 'True', 'max_iter': 30, 'solver': 'liblinear'}\u001B[39;00m\n",
      "\u001B[1;31mNameError\u001B[0m: name 'X_train' is not defined"
     ]
    }
   ],
   "source": [
    "#LogisticRegression\n",
    "lr_params = {\n",
    "    'fit_intercept': ['True', 'False'],\n",
    "    'solver': ['liblinear', 'sag', 'lbfgs'],\n",
    "    'max_iter': [10, 30, 40],\n",
    "}\n",
    "lr_gridCV = GridSearchCV(model_lr, param_grid=lr_params, cv=5, scoring=None)\n",
    "\n",
    "lr_gridCV.fit(X_train, y_train)\n",
    "\n",
    "print(lr_gridCV.best_params_)\n",
    "#{'fit_intercept': 'True', 'max_iter': 30, 'solver': 'liblinear'}\n",
    "\n",
    "best_lr_model = lr_gridCV.best_estimator_\n",
    "best_lr_pred = best_lr_model.predict(X_test)\n",
    "best_acc_lr = accuracy_score(y_test, best_lr_pred)\n",
    "best_acc_lr\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "print(best_lr_model.coef_)\n",
    "\n",
    "print(best_rf_model.feature_importances_)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "lr_pred = best_lr_model.predict(X_test)\n",
    "rf_pred = best_rf_model.predict(X_test)\n",
    "\n",
    "#评估 acc\n",
    "lr_acc = accuracy_score(y_test, lr_pred)\n",
    "rf_acc = accuracy_score(y_test, rf_pred)\n",
    "#precision\n",
    "lr_precision = precision_score(y_test, lr_pred)\n",
    "rf_precision = precision_score(y_test, rf_pred)\n",
    "#recall\n",
    "lr_recall = recall_score(y_test, lr_pred)\n",
    "rf_recall = recall_score(y_test, rf_pred)\n",
    "#f1\n",
    "lr_f1 = f1_score(y_test, lr_pred)\n",
    "rf_f1 = f1_score(y_test, rf_pred)\n",
    "print(f'logiect--   acc:{lr_acc} , precision:{lr_precision}, recall:{lr_recall} ,f1:{lr_f1}')\n",
    "print(f'randomtree--   acc:{rf_acc} , precision:{rf_precision}, recall:{rf_recall} ,f1:{rf_f1}')\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n",
     "is_executing": true
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "train_sizes=np.linspace(0.1,1.0,10)\n",
    "\n",
    "train_sizes, train_scores, test_scores = learning_curve(RandomForestClassifier(),X,y,train_sizes=train_sizes,\n",
    "               cv=5,n_jobs=-1)\n",
    "\n",
    "train_mean = np.mean(train_scores, axis=1)\n",
    "train_std = np.std(train_scores, axis=1)\n",
    "\n",
    "test_mean = np.mean(test_scores, axis=1)\n",
    "test_std = np.std(test_scores, axis=1)\n",
    "\n",
    "# 绘制学习曲线\n",
    "plt.figure(figsize=(10,6))\n",
    "\n",
    "plt.plot(train_sizes,train_mean,color='blue',label='Training score')\n",
    "plt.fill_between(train_sizes,train_mean - train_std,train_mean + train_std,alpha=0.1,color=\"blue\")\n",
    "\n",
    "plt.plot(train_sizes,test_mean,color='green',label=\"Cross-validation score\")\n",
    "plt.fill_between(train_sizes,test_mean - test_std,test_mean + test_std,alpha=0.1, color=\"green\")\n",
    "plt.grid()\n",
    "plt.xlabel(\"Training examples\")\n",
    "plt.ylabel(\"fit_times\")\n",
    "plt.title(\"style_cove\")\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n",
     "is_executing": true
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "fpr, tpr, thresholds = metrics.roc_curve(y_test, lr_pred)\n",
    "roc_auc = metrics.auc(fpr, tpr)\n",
    "display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,                                          estimator_name='example estimator')\n",
    "display.plot()  # doctest: +SKIP\n",
    "plt.show()   "
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n",
     "is_executing": true
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "lr_pred = best_lr_model.predict(X_test)\n",
    "rf_pred = best_rf_model.predict(X_test)\n",
    "\n",
    "\n",
    "joblib.dump(best_lr_model,\"best_lr_model.pkl\")\n",
    "joblib.dump(best_rf_model,\"best_rf_model.pkl\")"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}